home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Aminet 5
/
Aminet 5 - March 1995.iso
/
Aminet
/
text
/
hyper
/
ag2html.lha
/
ag2html.pl
next >
Wrap
Perl Script
|
1994-11-29
|
11KB
|
258 lines
#!/usr/local/bin/perl
# <title> AG2HTML.pl </title>
# <h1> AG2HTML.pl Amiga Guide to HTML Converter</h1>
# <listing>
# <b>This code is (c) 1993,1994 to Michael Witbrock</b>
# <b>This code is © 1993,1994 to Michael Witbrock</b>
# <b>You may use it and modify and redistribute it freely,</b>
# <b>but you may not sell it in any way (including in disk collections)</b>
# <b>without first recieving my permission.</b>
# <b>Fred Fish, and the makers of the Aminet CD are granted that permission.
# <b>If you significantly improve it, please let me know so that I can</b>
# <b>use the new version.</b>
# <b>You may contact me as witbrock@cs.cmu.edu</b>
$VERSION = "2.941126c";
#http://www.cs.cmu.edu:8001/Web/People/mjw/Computer/Amiga/Perl/AG2HTML.pl
#is a URL to the latest version of this program.
# P.S. I know that this is redundant. It's my second perl script, and I don't
# yet know how to do subroutines. When I do, it will be both neater and shorter.
# P.P.S the reason it is preformatted is because many AG documents have button
# layouts which depend on this. Perhaps later I will make it smarter, so that
# it can recognise obvious text paragraphs and do the right thing.
# DONE: P.P.P.S handling some of the style flags, and the next, menu, prev, help, etc buttons
# is next, along with removing what can't be done (background colours etc).
#Changes: October 16 1993 MJW
# Allow email addresses.
# Translate & to & > to > < to <
#Changes: November the 9th 1993 MJW
# Allow some strange links that have strings after the link name
# found in Viewport
#Changes: March 12 1994 MJW
# Allow Link whereever I allow link -after Steve Gowdy's suggestion-
#Changes: April 22 1994 MJW
# Major rewrite?
# for Daniel Barrett, @{"mybutton" system "more myfile"} -> <A HREF="myfile">mybutton</A>
# THE next version will handle pictures in AmigaReport files automatically, but that may be a few days off.
# Picture conversions done
# Automatic inlining of pictures done for ones like the following
#UNHANDLED?: @{"Lynx" system "display.s 10,100,AR215_pic1.iff,Lynx"} is a user-friendly hypertext interface on UNIX and VMS
#UNHANDLED?: The @{"Commodore Amiga Information Resource" system "display.s 10,100,AR215_pic2.iff,CAIR"} is a collection
#UNHANDLED?: - @{"Amiga Report" system "display.s 10,100,AR215_pic3.iff,AR on the WWW"}
#Changes May 28th 1994
# Amiga report changed to display like this:
# @{" AR Logo " system "display.s 650 100 AR217_pic2.iff Amiga_Report"}
# Change it to handle that too.
# June 9th ---- Changed pointer to itself
# June 18th ---- version numbers, so I can tell people when it changes
# June 28th ---- try to tolower links so they work for gowdy
# Nov 23 1994 Fixed to work under perl 5, but bug handling multiple links
# on one line persists! I wish they wouldn't do that.
# (fixed for Waldemar Zurowski)
#
# Nov 26 1994 Incorporated Bilbo (Waldemar)'s changes to handle multiple links
# in a line properly. Thanks Waldemar!.
#In real : Waldemar Zurowski
#email internet: WALDEK@PLEARN.EDU.PL or bilbo@ci.pwr.wroc.pl
#<A HREF="http://sun10.ci.pwr.wroc.pl/~bilbo/">Bilbo</A>
#
# Nov 26 1994 b. Fixed strange links with commas
# UNHANDLED?: @{" North America ", link P6-3-3}
# Nov 26 1994 c. Tidied up IFF handling
sub handlenode{
while (<guidef>){
chop;
if (m/\@[eE][nN][dD][Nn][oO][dD][Ee]/){
# print "Found \@endnode\n"; # found @endnode
print pagef "</pre>\n"; #
if ($buttonline ne "<HR>") { print pagef "$buttonline"; }
print pagef "<HR>HTML Conversion by <a href=\"http://www.cs.cmu.edu:8001/Web/People/mjw/Computer/Amiga/Perl/AG2HTML.pl\"><i>AG2HTML.pl</i></a> V${VERSION}, perl $] & <a href=\"http://www.cs.cmu.edu:8001/Web/People/mjw/mjwhome.html\"><i>witbrock\@cs.cmu.edu</i></a>\n";
close (pagef); #
last; #
} else {
# Remember TOC link, and delete the line if found
if (m/\@[tT][oO][cC]\s*"?([^\"\s\}]*)"?/){
($tmp=$1) =~ y/A-Z/a-z/;
$buttonline = $buttonline."<a href=\"${tmp}\.HTML\">[Contents]<\/a> ";
next; #
} #
# Remember prev link, and delete the line if found
if (m/\@[pP][rR][eE][vV]\s*"?([^\"\s\}]*)"?/){
($tmp =$1) =~ y/A-Z/a-z/;
$buttonline = $buttonline."<a href=\"${tmp}\.HTML\">[Browse <-]<\/a> ";
next;
}
# Remember next link, and delete the line if found
if (m/\@[nN][eE][xX][tT]\s*"?([^\"\s\}]*)"?/){
($tmp =$1) =~ y/A-Z/a-z/;
$buttonline = $buttonline."<a href=\"${tmp}\.HTML\">[Browse ->]<\/a> ";
next;
}
# Remember help link, and delete the line if found
if (m/\@[hH][eE][lL][pP]\s*"?([^\"\s\}]*)"?/){
($tmp =$1) =~ y/A-Z/a-z/;
$buttonline = $buttonline."<a href=\"${tmp}\.HTML\">[Help]<\/a> ";
next;
}
# otherwise look for more calls, links, or plain text
study;
s/&/&/g;
s/>/>/g;
s/</</g;
# Handle pictures (as inline maybe change eventually)
s/\@\{\s*\"([^\"]*)\"\s+[sS][yY][Ss][tT][eE][mM]\s+\"displa\S* [0-9,\s]*(\S*)\.iff[,\s]*([^\"\}]*)\"*\s*\}/<p>\<img src=\"$2\.gif\"\><br>$3<p>$1/g;
# for Daniel Barrett, \@{"mybutton" system "more myfile"} -> <A HREF="myfile">mybutton</A>
s/\@\{\s*\"([^\"]*)\"\s+[sS][yY][Ss][tT][eE][mM]\s+\"more *([^\"\s\}]*)\"*\s*\}/\<a href=\"$2\"\>$1<\/a>/;
#link - FROM BILBO: Here are my changes - I replaced 3 ifS with
# whileS and remove 'g' option after s/PATTERN/PATTERN/
while (m/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*\}/){
($link = $2) =~ y/A-Z/a-z/; #
s/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*\}/\<a href=\"${link}\.HTML\"\>$1<\/a>/; #HERE WAS 'G' OPTION
}
# found in viewport -- link with string after don't know what means
while (m/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*\"[^\"]*\"\s*\}/) {
($link = $2) =~ y/A-Z/a-z/; #
s/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*\"[^\"]*\"\s*\}/\<a href=\"${link}\.HTML\"\>$1<\/a>/; #HERE WAS 'G' OPTION
}
# found in kingcon -- link with number after don't know what means
while (m/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*[0123456789]*\s*\}/) {
($link = $2) =~ y/A-Z/a-z/; #
s/\@\{\s*\"([^\"]*)\"[\s,]+[Ll][iI][nN][kK]\s+\"*([^\"\s\}]*)\"*\s*[0123456789]*\s*\}/\<a href=\"${link}\.HTML\"\>$1<\/a>/;#HERE WAS 'G' OPTION
}
s/\@\{[bB]\}/<B>/g; s/\@\{[uU][bB]\}/<\/B>/g; # bold
s/\@\{[iI]\}/<I>/g; s/\@\{[uU][iI]\}/<\/I>/g; #italic
if (m/\@\{/) { # Recognise and hide unhandled cases
print "UNHANDLED?: $_\n";
s/(\@\{[^\}]*\})/<!- Unhandled AmigaGuide(TM) sequence "$1">/g;
}
print pagef "$_\n";
}
}
}
die "Usage: %0 <AmigaGuideFile> $!\n N.B. This program puts AG node HTML files in a dir.\n"
unless ( $#ARGV == 0 );
$agname = $ARGV[0]; $_ = $agname;
if (/(.*)\.guide/){
$root = $1;
} else {
die "Error: $agname doesn't seem to be an AmigaGuide(TM) file.\n";
}
$dirname = $root."_Sections";
mkdir($dirname,oct(777)) unless (-e $dirname);
die "Couldn't make $dirname \n" unless -e $dirname;
open (guidef, $agname) || die "Can't open $agname: $!\n";
$databasefound=0;
# Move suitably labelled pictures to go with the file
@iffs = <${root}_*.iff>;
if (defined(@iffs)){
$ciff = $#iffs+1;
print "# Moving $ciff iff files: @iffs \n";
system ("mv ${root}_\*\.iff $dirname");
}
# Convert them to something mosaic can handle
opendir(sect,$dirname) || die "Can't read dir $dirname\n";
while ($_ = readdir(sect)){
next unless m#(.*)\.iff#;
print "Converting picture $1 to gif \n" unless -e "${dirname}/$1.gif";
print "Founded gif version of picture $1 \n" if -e "${dirname}/$1.gif";
system("ilbmtoppm < ${dirname}/$1.iff | ppmtogif > ${dirname}/$1.gif")
unless -e "${dirname}/$1.gif";
}
closedir(sect);
while (<guidef>) {
chop;
if ((m/\@[dD][Aa][Tt][Aa][bB][aA][sS][eE]\s*"(.*)"/)
|| (m/\@[dD][Aa][Tt][Aa][bB][aA][sS][eE]\s*(\S*)/)){
if ($databasefound != 0) {
print "IGNORED: database label $1 found after first one $database\n";
next;
}
$databasefound = 1;
$database=$1;
print "Database: $database\n";
while (<guidef>){
chop;
$nodetitle="Untitled";
$buttonline = "<HR>";
if ( (m/\@[Nn][oO][dD][Ee]\s*"(\S*)"\s*"(.*)"/)
|| (m/\@[Nn][oO][dD][Ee]\s*(\S*)\s*"(.*)"/)
|| (m/\@[Nn][oO][dD][Ee]\s*(\S*)/)
){
# found \@Node LABEL "title"
# or just \@node Label or even \@node "label" "title"
$nodename = $1;
$nodetitle = $2;
$htmlname = $dirname."/".$1.".HTML";
if (-e $htmlname) {
unlink($htmlname);
}
open (pagef,'>'.$htmlname)
|| die "Can't open $htmlname to write $!\n";
print pagef "<HTML>\n<TITLE>$nodetitle</TITLE>\n";
print pagef "<H1>$nodetitle</H1>\n<pre>\n";
last;
} else {
if (m/^.*\S.*$/) {
print "# SKIPPED while looking for \@node: $_ \n";
}
next;
}
} # Found first @node line
print "NOTE: main node is $htmlname\n";
# Now look for end of first node
&handlenode;
# found end of first node, or of the file
} else { # No database label on this line
if ($databasefound == 0){ # stuff before first @database is ignored
print "#SKIPPED while looking for database: $_\n";
next;
}
# otherwise, it may be a perfectly good line
# check whether it is the start of a new node
$nodetitle="Untitled";
$buttonline = "<HR>";
if ( (m/\@[Nn][oO][dD][Ee]\s*"(\S*)"\s*"(.*)"/)
|| (m/\@[Nn][oO][dD][Ee]\s*(\S*)\s*"(.*)"/)
|| (m/\@[Nn][oO][dD][Ee]\s*(\S*)/)
){ # found @Node LABEL "title", @node Label, or even @node "label" "title"
$nodename = $1;
$nodetitle = $2;
$nodename =~ y/A-Z/a-z/;
$htmlname = $dirname."/".$nodename.".HTML";
unlink($htmlname) if -e $htmlname;
open (pagef,'>'.$htmlname)
|| die "Can't open $htmlname to write $!\n";
print pagef "<HTML>\n<TITLE>$nodetitle</TITLE>\n";
print pagef "<H1>$nodetitle</H1>\n<pre>\n";
# print "Found Node: $nodename $nodetitle as $htmlname \n";
# since we found the beginning of the node, copy to the end
&handlenode;
} else { # Haven't yet found a @node
if (m/^.*\S.*$/) {
print "# SKIPPED while looking for \@node: $_ \n";
}
next;
}
}
}
#
#
close guidef;
# </listing>